# delimit ;

clear;

capture log close;
global raw "/disk/homedirs/frenche/research/merge";
log using $raw/impute_mcbs.log, replace;
set more off;


use $raw/mergecouples.dta;
*if death_annualize==1 we assume households dies half way between the two year period, otherwise if =2 we annualize all
dead households as if they lived for the fulll 2 year period because in HRS we do not use exact death date;
local death_annualize=2;

****************************************************************************************;
* DB IMPUTATION PROCEDURE HERE								;
****************************************************************************************;
***********CURRENT MAIN JOB ************************************************************;

* basic match criteria- keep only medicaid recipients;
keep if medicaidind==1;
keep if age>=65;

* generate a HRS consistent health status - generate bad health if respondents are in;
* fair or poor health (assign "bad" health to dead households);
*;
replace heal=1 if dead==1;

*generate a HRS consistent race measure;
gen black=0;
replace black=1 if race=="2";

*age polynomial;
gen age2=age*age;
gen age3=age2*age;
gen age4=age3*age;

*use oop in MCBS, which maps onto oopd in the HRS;
*In the HRS we construct the relevant income measure during imputation;
gen income_temp=income;
gen lowlev=1000;
replace income_temp=lowlev if income_temp<lowlev & income_temp~=.;
replace income_temp=log(income_temp);



*generate dummies for for the various qualifications (mirrored in HRS);
*see Ric_1.pdf SPDEGRCV;
gen lesshschool=0;
replace lesshschool=inrange(education, -8,3);

gen hschool=0;
replace hschool=1 if education==4;

gen somecollege=0;
replace somecollege=inrange(education,5,7);

gen college=0;
replace college=1 if education==8 | education==9;


sort married;
by married: sum medicaid;
by married: sum medicaid if age>75;

replace married=0 if married==.;
replace heal=1 if heal==.;
replace workind=0 if workind==.;


sort married;
by married: sum medicaid;
by married: sum medicaid if age>75;



sum medicaid age year male nurshome married heal black oop income_temp workind dead hschool somecollege college age2;

reg medicaid age year male nurshome married heal black oop income_temp workind dead hschool somecollege college age2;

gen drvisit=0;
replace drvisit=1 if xexp_outpatient>0 &xexp_outpatient!=.;

gen hospstay=0;
replace hospstay=1 if xexp_inpatient>0 &xexp_inpatient!=.;

gen dentist=0;
replace dentist=1 if xexp_dental>0 &xexp_dental!=.;

gen drdenhosp=0;
replace drdenhosp=max(drvisit,hospstay,dentist);

gen nursdead=nurshome*dead;
gen nursoop=nurshome*oop;
gen deadoop=dead*oop;

reg medicaid age year male nurshome married heal black oop income_temp workind dead hschool somecollege college age2 age3 age4 drdenhosp;

reg medicaid age year male nurshome married heal black oop income_temp workind dead hschool somecollege college age2 age3 age4 drdenhosp nursdead nursoop deadoop;



gen b0 =_b[_cons];
gen bage =_b[age];
gen byear=_b[year];
gen bmale=_b[male];
gen bnurs_ind=_b[nurshome];
gen bheal=_b[heal];
gen bblack=_b[black];
gen boopd=_b[oop];
gen bfaminc_temp=_b[income_temp];
gen blfpr=_b[workind];
gen bdead=_b[dead];
gen bnurs_inddead=_b[nursdead];
gen bnurs_indoopd=_b[nursoop];
gen bdeadoopd=_b[deadoop];
gen bhschool =_b[hschool];
gen bsomecollege =_b[somecollege];
gen bcollege=_b[college];
gen bage2=_b[age2];
gen bage3=_b[age3];
gen bage4=_b[age4];
gen bmarried=_b[married];
gen bdrdenhosp=_b[drdenhosp];



predict medicaidp;

sum medicaid medicaidp;


drop if medicaidp==.;

gen residsmt = (medicaid-medicaidp);

preserve;

keep b0 bage byear bmale bnurs_ind bheal bblack boopd bfaminc_temp blfpr bdead bhschool bsomecollege bcollege bage2 bage3 bage4 bdrdenhosp bmarried bnurs_inddead bnurs_indoopd bdeadoopd;

keep if _n==1;

save $raw/impute_coeffs.dta , replace;

restore;
preserve;
keep residsmt medicaidp;

gen donor=1;

sort residsmt;


save $raw/impute_donors.dta , replace;

restore;

drop b0 bage byear bmale bnurs_ind bheal bblack boopd bfaminc_temp blfpr bdead bhschool bsomecollege bcollege bage2 bage3 bage4 bdrdenhosp bmarried bnurs_inddead bnurs_indoopd bdeadoopd;

drop residsmt;

reg totalexp age year male nurshome married heal black oop income_temp workind dead hschool somecollege college age2 age3 age4 drdenhosp nursdead nursoop deadoop;

gen b0 =_b[_cons];
gen bage =_b[age];
gen byear=_b[year];
gen bmale=_b[male];
gen bnurs_ind=_b[nurshome];
gen bheal=_b[heal];
gen bblack=_b[black];
gen boopd=_b[oop];
gen bfaminc_temp=_b[income_temp];
gen blfpr=_b[workind];
gen bdead=_b[dead];
gen bnurs_inddead=_b[nursdead];
gen bnurs_indoopd=_b[nursoop];
gen bdeadoopd=_b[deadoop];
gen bhschool =_b[hschool];
gen bsomecollege =_b[somecollege];
gen bcollege=_b[college];
gen bage2=_b[age2];
gen bage3=_b[age3];
gen bage4=_b[age4];
gen bmarried=_b[married];
gen bdrdenhosp=_b[drdenhosp];



predict totalexpp;

sum totalexp totalexpp;


drop if totalexpp==.;

gen residsmt = (totalexp-totalexpp);

preserve;

keep b0 bage byear bmale bnurs_ind bheal bblack boopd bfaminc_temp blfpr bdead bhschool bsomecollege bcollege bage2 bage3 bage4 bdrdenhosp bmarried bnurs_inddead bnurs_indoopd bdeadoopd;

keep if _n==1;

save $raw/impute_totalexp_coeffs.dta , replace;

restore;
preserve;
keep residsmt totalexpp;

gen donor=1;

sort residsmt;


save $raw/impute_totalexp_donors.dta , replace;

restore;
drop medicaidp totalexpp residsmt b0 bage byear bmale bnurs_ind bheal bblack boopd bfaminc_temp blfpr bdead bhschool bsomecollege bcollege bage2 bage3 bage4 bdrdenhosp bmarried bnurs_inddead bnurs_indoopd bdeadoopd;

destring baseid, generate(hhid) ignore(G);
gen check=0;
bys hhid year :replace check=1 if baseid[_n]!=baseid[_n-1] & hhid[_n]==hhid[_n-1];
sum check;
drop check;
xtset hhid year;

*If you are alive at the beginning of last period;
*Annualise if you are not dead;
*Create variables that are consistent with the HRS.;
*All odf these variables are forwards looking!;
gen medicaid2yr=0;
replace medicaid2yr=(medicaid+F.medicaid)/`death_annualize' if dead==0 & F.dead==1;
replace medicaid2yr=(medicaid+F.medicaid)/2 if dead==0 & F.dead==0;

replace medicaid2yr=medicaid/`death_annualize' if dead==1;

gen oop2yr=0;
replace oop2yr=(oop+F.oop)/`death_annualize' if dead==0 &F.dead==1;
replace oop2yr=(oop+F.oop)/2 if dead==0 &F.dead==0;

replace oop2yr=oop/`death_annualize' if dead==1;

gen nurshomedays2yr=0;
replace nurshomedays2yr=(nurshomedays+F.nurshomedays) if dead==0 &F.dead==1;
replace nurshomedays2yr=(nurshomedays+F.nurshomedays)/2 if dead==0 &F.dead==0;

replace nurshomedays2yr=nurshomedays if dead==1;


gen nurshome2yr=0;
replace nurshome2yr=1 if nurshomedays2yr>=60 & nurshomedays2yr!=.;

gen totalexp2yr=0;
replace totalexp2yr=(totalexp+F.totalexp)/`death_annualize' if dead==0 &F.dead==1;
replace totalexp2yr=(totalexp+F.totalexp)/2 if dead==0 &F.dead==0;
replace totalexp2yr=totalexp/`death_annualize' if dead==1;


gen drvisit2yr=0;
replace drvisit2yr=1 if dead==1 &drvisit==1;
replace drvisit2yr=max(drvisit,F.drvisit) if dead==0;


gen hospstay2yr=0;
replace hospstay2yr=1 if dead==1 &hospstay==1;
replace hospstay2yr=max(hospstay,F.hospstay) if dead==0;

gen dentist2yr=0;
replace dentist2yr=1 if dead==1 &dentist==1;
replace dentist2yr=max(dentist,F.dentist) if dead==0;

gen drdenhosp2yr=0;
replace drdenhosp=max(drvisit2yr,hospstay2yr,dentist2yr);

*because created forward looking measure, create second potential year for year1 deaths;
expand =2 if dead==1, gen(expanddummy);
*update year and age for those deaths;
replace year=year+1 if expanddummy==1;
replace age=age+1 if expanddummy==1;
replace age2=(age)^2 if expanddummy==1;
replace age3=(age)^3 if expanddummy==1;
replace age4=(age)^4 if expanddummy==1;

xtset hhid year;

*Create the (hypothetical) second year backwards looking values;
*All of these lagged variables exist if you died because of our expand command;
local backlist medicaid totalexp oop nurshome drdenhosp nurshomedays;

foreach var in `backlist'{;
gen `var'_tmp=L.`var'2yr;
replace `var'2yr=`var'_tmp;
drop `var'_tmp;
};
*Data should be backwards looking as in HRS;

*

sum medicaid2yr age year male nurshome2yr married heal black oop2yr income_temp workind dead hschool somecollege college age2 nurshomedays2yr;

reg medicaid2yr age year male nurshome2yr married heal black oop2yr income_temp workind dead hschool somecollege college age2 nurshomedays2yr;


gen nursdead2yr=nurshome2yr*dead;
gen nursoop2yr=nurshome2yr*oop2yr;
gen deadoop2yr=dead*oop2yr;

sum medicaid2yr age year male nurshome2yr married heal black oop2yr income_temp workind dead hschool somecollege college age2 age3 age4 drdenhosp2yr nursdead2yr nursoop2yr deadoop2yr nurshomedays2yr;

reg medicaid2yr age year male nurshome2yr married heal black oop2yr income_temp workind dead hschool somecollege college age2 age3 age4 drdenhosp2yr nursdead2yr nursoop2yr deadoop2yr nurshomedays2yr;




gen b0 =_b[_cons];
gen bage =_b[age];
gen byear=_b[year];
gen bmale=_b[male];
gen bnurs_ind=_b[nurshome2yr];
gen bheal=_b[heal];
gen bblack=_b[black];
gen boopd=_b[oop2yr];
gen bfaminc_temp=_b[income_temp];
gen blfpr=_b[workind];
gen bdead=_b[dead];
gen bnurs_inddead=_b[nursdead2yr];
gen bnurs_indoopd=_b[nursoop2yr];
gen bdeadoopd=_b[deadoop2yr];
gen bhschool =_b[hschool];
gen bsomecollege =_b[somecollege];
gen bcollege=_b[college];
gen bage2=_b[age2];
gen bage3=_b[age3];
gen bage4=_b[age4];
gen bmarried=_b[married];
gen bdrdenhosp=_b[drdenhosp2yr];
gen bnursing=_b[nurshomedays2yr];


predict medicaidp;

sum medicaid2yr medicaidp;

*now drop the people for who medicaid2yr is missing i.e. the first wave of observation where we can't construct backwards looking data;
drop if medicaid2yr==.;
drop if medicaidp==.;

gen residsmt = (medicaid2yr-medicaidp);

preserve;

keep b0 bage byear bmale bnurs_ind bheal bblack boopd bfaminc_temp blfpr bdead bhschool bsomecollege bcollege bage2 bage3 bage4 bdrdenhosp bmarried bnurs_inddead bnurs_indoopd bdeadoopd bnursing;

keep if _n==1;

save $raw/impute_coeffs2yr.dta , replace;

restore;
preserve;
keep residsmt medicaidp;

gen donor=1;

sort residsmt;


save $raw/impute_donors2yr.dta , replace;

restore;

drop b0 bage byear bmale bnurs_ind bheal bblack boopd bfaminc_temp blfpr bdead bhschool bsomecollege bcollege bage2 bage3 bage4 bdrdenhosp bmarried bnurs_inddead bnurs_indoopd bdeadoopd bnursing;

drop residsmt;

reg totalexp2yr age year male nurshome2yr married heal black oop2yr income_temp workind dead hschool somecollege college age2 age3 age4 drdenhosp2yr nursdead2yr nursoop2yr deadoop2yr nurshomedays2yr;




gen b0 =_b[_cons];
gen bage =_b[age];
gen byear=_b[year];
gen bmale=_b[male];
gen bnurs_ind=_b[nurshome2yr];
gen bheal=_b[heal];
gen bblack=_b[black];
gen boopd=_b[oop2yr];
gen bfaminc_temp=_b[income_temp];
gen blfpr=_b[workind];
gen bdead=_b[dead];
gen bnurs_inddead=_b[nursdead2yr];
gen bnurs_indoopd=_b[nursoop2yr];
gen bdeadoopd=_b[deadoop2yr];
gen bhschool =_b[hschool];
gen bsomecollege =_b[somecollege];
gen bcollege=_b[college];
gen bage2=_b[age2];
gen bage3=_b[age3];
gen bage4=_b[age4];
gen bmarried=_b[married];
gen bdrdenhosp=_b[drdenhosp2yr];
gen bnursing=_b[nurshomedays2yr];



predict totalexpp;

sum totalexp2yr totalexpp;


drop if totalexpp==.;
drop if totalexp2yr==.;

gen residsmt = (totalexp2yr-totalexpp);

preserve;

keep b0 bage byear bmale bnurs_ind bheal bblack boopd bfaminc_temp blfpr bdead bhschool bsomecollege bcollege bage2 bage3 bage4 bdrdenhosp bmarried bnurs_inddead bnurs_indoopd bdeadoopd bnursing;

keep if _n==1;

save $raw/impute_totalexp_coeffs2yr.dta , replace;

restore;
preserve;
keep residsmt totalexpp;

gen donor=1;

sort residsmt;


save $raw/impute_totalexp_donors2yr.dta , replace;

log close;



